This document takes the QCd, normalized, scaled data and performs clustering.
#Install and load required R packages. Install the packages if you do not already have them installed
library(Seurat)
library(kableExtra)
library(ggplot2)
library(plotly)
library(tidyverse)
library(rmarkdown)
# Input path
datadirectory <- "/Users/nagarajanv/OneDrive - National Institutes of Health/Working/scRNAseqNilisha/"
# Set working directory
setwd(datadirectory)
# Load QCd data from current working directory
load('SeuratObjectAfterNormalization.RData')
##################### PCA
# Run PCA with 50 pcs
WT_TG_Filt_Scaled <- RunPCA(object = WT_TG_Filt_Scaled, npcs=50)
# Print genes in the top 5 pcs
print(WT_TG_Filt_Scaled[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1
## Positive: Cybb, Spi1, Csf1r, Clec4a3, Clec4a1
## Negative: AW112010, Trbc2, Ms4a4b, Nkg7, Il2rb
## PC_ 2
## Positive: Il1rl1, Lmo4, Hes1, Lpcat2, Rnf128
## Negative: Cd79a, Ms4a1, Ebf1, Cd79b, Ly6d
## PC_ 3
## Positive: Ms4a4b, Nkg7, Ccl5, Cd3g, Trac
## Negative: Cd79a, Ebf1, Ms4a1, Ly6d, Cd79b
## PC_ 4
## Positive: Ccl5, Nkg7, Zeb2, Ms4a4b, Cx3cr1
## Negative: Wfdc21, Mmp9, Cd177, Itgb2l, Lcn2
## PC_ 5
## Positive: Klrb1c, Ncr1, Fcer1g, Tyrobp, Styk1
## Negative: Cd3g, Ifi27l2a, Ly6a, Ifit3, Ifit1
# Identify number of PCs that explains majority of variations
ElbowPlot(WT_TG_Filt_Scaled, ndims=50, reduction = "pca")
# Visualize the genes in the first PC
VizDimLoadings(WT_TG_Filt_Scaled, dims = 1, ncol = 1) + theme_minimal(base_size = 8)
# Visualize the genes in the second PC
VizDimLoadings(WT_TG_Filt_Scaled, dims = 2, ncol = 1) + theme_minimal(base_size = 8)
# Set the identity column to WT vs TG
Idents(WT_TG_Filt_Scaled) <- "orig.ident"
# Visualize the cells after pca
DimPlot(object = WT_TG_Filt_Scaled, reduction = "pca")
# Plot heatmap with cells=500 plotting cells with extreme cells on both ends of spectrum
#DimHeatmap(object = WT_TG_Filt_Scaled, dims = 1:6, cells = 500, balanced = TRUE)
DimHeatmap(object = WT_TG_Filt_Scaled, dims = 2, cells = 500, balanced = TRUE)
##################### Clustering
# use the first 20 pc's
use.pcs = 1:20
# FindNeighbors with 20 pcs
WT_TG_Filt_Scaled <- FindNeighbors(WT_TG_Filt_Scaled, reduction="pca", dims = use.pcs)
# FindClusters with resolution starting at 0.25 and ending at 4, with 0.5 increment
WT_TG_Filt_Scaled <- FindClusters(
object = WT_TG_Filt_Scaled,
resolution = seq(0.25,4,0.5),
verbose = FALSE
)
head(WT_TG_Filt_Scaled)
## orig.ident nCount_RNA nFeature_RNA percent.mito
## AAACCCATCAACACCA-WT WT 7477 2388 1.297312
## AAACGAAAGCTCTGTA-WT WT 8471 2699 2.290166
## AAACGAACAGCTCTGG-WT WT 9502 2658 2.694170
## AAAGGATCATGCCGAC-WT WT 14984 2804 4.378003
## AAAGGGCAGGCCTAGA-WT WT 10101 2653 2.910603
## AAAGTCCGTTCTTGCC-WT WT 4780 1711 3.221757
## AAATGGAAGCTACTAC-WT WT 6593 2292 1.956621
## AAATGGACAAGGTCAG-WT WT 8548 2884 3.263921
## AACAAAGGTGTCACAT-WT WT 7765 2287 2.974887
## AACAAGATCCTTCGAC-WT WT 8894 2637 2.900832
## percent.ribo percent.hb RNA_snn_res.0.25 RNA_snn_res.0.75
## AAACCCATCAACACCA-WT 19.43293 0.00000000 7 8
## AAACGAAAGCTCTGTA-WT 17.91996 0.00000000 2 3
## AAACGAACAGCTCTGG-WT 33.86655 0.00000000 0 0
## AAAGGATCATGCCGAC-WT 38.70796 0.01334757 3 2
## AAAGGGCAGGCCTAGA-WT 32.60073 0.00000000 1 1
## AAAGTCCGTTCTTGCC-WT 30.69038 0.00000000 0 0
## AAATGGAAGCTACTAC-WT 17.54892 0.00000000 7 8
## AAATGGACAAGGTCAG-WT 15.65278 0.00000000 5 7
## AACAAAGGTGTCACAT-WT 31.59047 0.00000000 0 0
## AACAAGATCCTTCGAC-WT 26.14122 0.00000000 5 7
## RNA_snn_res.1.25 RNA_snn_res.1.75 RNA_snn_res.2.25
## AAACCCATCAACACCA-WT 10 15 17
## AAACGAAAGCTCTGTA-WT 12 14 16
## AAACGAACAGCTCTGG-WT 0 0 0
## AAAGGATCATGCCGAC-WT 3 7 7
## AAAGGGCAGGCCTAGA-WT 2 1 2
## AAAGTCCGTTCTTGCC-WT 5 2 5
## AAATGGAAGCTACTAC-WT 10 15 17
## AAATGGACAAGGTCAG-WT 9 8 8
## AACAAAGGTGTCACAT-WT 1 2 6
## AACAAGATCCTTCGAC-WT 9 8 8
## RNA_snn_res.2.75 RNA_snn_res.3.25 RNA_snn_res.3.75
## AAACCCATCAACACCA-WT 19 19 20
## AAACGAAAGCTCTGTA-WT 18 18 19
## AAACGAACAGCTCTGG-WT 7 9 7
## AAAGGATCATGCCGAC-WT 6 5 4
## AAAGGGCAGGCCTAGA-WT 1 0 0
## AAAGTCCGTTCTTGCC-WT 5 6 10
## AAATGGAAGCTACTAC-WT 19 19 20
## AAATGGACAAGGTCAG-WT 9 8 6
## AACAAAGGTGTCACAT-WT 13 14 11
## AACAAGATCCTTCGAC-WT 9 8 6
## seurat_clusters
## AAACCCATCAACACCA-WT 20
## AAACGAAAGCTCTGTA-WT 19
## AAACGAACAGCTCTGG-WT 7
## AAAGGATCATGCCGAC-WT 4
## AAAGGGCAGGCCTAGA-WT 0
## AAAGTCCGTTCTTGCC-WT 10
## AAATGGAAGCTACTAC-WT 20
## AAATGGACAAGGTCAG-WT 6
## AACAAAGGTGTCACAT-WT 11
## AACAAGATCCTTCGAC-WT 6
# Count number of clusters at each resolution
sapply(grep("res",colnames(WT_TG_Filt_Scaled@meta.data),value = TRUE),
function(x) length(unique(WT_TG_Filt_Scaled@meta.data[,x])))
## RNA_snn_res.0.25 RNA_snn_res.0.75 RNA_snn_res.1.25 RNA_snn_res.1.75
## 9 11 16 20
## RNA_snn_res.2.25 RNA_snn_res.2.75 RNA_snn_res.3.25 RNA_snn_res.3.75
## 22 24 25 26
####################### TSNE
# Run tsne with 20 pcs
WT_TG_Filt_Scaled <- RunTSNE(
object = WT_TG_Filt_Scaled,
reduction.use = "pca",
dims = use.pcs,
do.fast = TRUE)
# Plot all clusters in all resolutions
DimPlot(object = WT_TG_Filt_Scaled, group.by=grep("res",colnames(WT_TG_Filt_Scaled@meta.data),value = TRUE)[1:4], ncol=2 , pt.size=3.0, reduction = "tsne", label = T)
# change default identity
Idents(WT_TG_Filt_Scaled) <- "RNA_snn_res.0.75"
# list cell number in each cluster for WT vs TG
table(Idents(WT_TG_Filt_Scaled),WT_TG_Filt_Scaled$orig.ident)
##
## TG WT
## 0 380 143
## 1 163 229
## 2 89 68
## 3 90 64
## 4 71 64
## 5 87 30
## 6 64 52
## 7 50 44
## 8 23 39
## 9 27 29
## 10 16 19
# Visualize cluster at 0.75 resolution
DimPlot(object = WT_TG_Filt_Scaled, pt.size=0.5, reduction = "tsne", label = T)
# Color by WT vs TG
DimPlot(object = WT_TG_Filt_Scaled, pt.size=0.5, reduction = "tsne", group.by = "orig.ident" )
########################## UMAP
# Run umap with 20 pcs
WT_TG_Filt_Scaled <- RunUMAP(
object = WT_TG_Filt_Scaled,
reduction.use = "pca",
dims = use.pcs)
# Plot umap
DimPlot(object = WT_TG_Filt_Scaled, pt.size=0.5, reduction = "umap", label = T)
# Plot umap with WT_TG group
DimPlot(object = WT_TG_Filt_Scaled, pt.size=0.5, reduction = "umap", group.by = "orig.ident")
# Visualizations
# expression of variable genes across clusters
# Custom list of genes to visualize
#top10
mygenes = c("Lyz2","Cd74")
RidgePlot(WT_TG_Filt_Scaled, features = mygenes)
#VlnPlot(WT_TG_Filt_Scaled, features = top10)
VlnPlot(WT_TG_Filt_Scaled, features = mygenes)
#VlnPlot(WT_TG_Filt_Scaled, features = mygenes, split.by = "orig.ident")
FeaturePlot(WT_TG_Filt_Scaled, features = mygenes)
#FeaturePlot(WT_TG_Filt_Scaled, features = mygenes, split.by = "orig.ident")
DotPlot(WT_TG_Filt_Scaled, features = mygenes)
DoHeatmap(subset(WT_TG_Filt_Scaled), features = mygenes, size = 3)
VlnPlot(object = WT_TG_Filt_Scaled, features = "percent.mito", pt.size = 0.05)
save.image(file='SeuratObjectAfterClustering.RData')
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] rmarkdown_2.9 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7
## [5] purrr_0.3.4 readr_1.4.0 tidyr_1.1.3 tibble_3.1.2
## [9] tidyverse_1.3.1 plotly_4.9.4.1 ggplot2_3.3.5 kableExtra_1.3.4
## [13] SeuratObject_4.0.2 Seurat_4.0.3
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1 systemfonts_1.0.2
## [4] plyr_1.8.6 igraph_1.2.6 lazyeval_0.2.2
## [7] splines_4.1.0 listenv_0.8.0 scattermore_0.7
## [10] digest_0.6.27 htmltools_0.5.1.1 fansi_0.5.0
## [13] magrittr_2.0.1 tensor_1.5 cluster_2.1.2
## [16] ROCR_1.0-11 globals_0.14.0 modelr_0.1.8
## [19] matrixStats_0.59.0 svglite_2.0.0 spatstat.sparse_2.0-0
## [22] colorspace_2.0-2 rvest_1.0.0 ggrepel_0.9.1
## [25] haven_2.4.1 xfun_0.24 crayon_1.4.1
## [28] jsonlite_1.7.2 spatstat.data_2.1-0 survival_3.2-11
## [31] zoo_1.8-9 glue_1.4.2 polyclip_1.10-0
## [34] gtable_0.3.0 webshot_0.5.2 leiden_0.3.8
## [37] future.apply_1.7.0 abind_1.4-5 scales_1.1.1
## [40] DBI_1.1.1 miniUI_0.1.1.1 Rcpp_1.0.6
## [43] viridisLite_0.4.0 xtable_1.8-4 reticulate_1.20
## [46] spatstat.core_2.2-0 htmlwidgets_1.5.3 httr_1.4.2
## [49] RColorBrewer_1.1-2 ellipsis_0.3.2 ica_1.0-2
## [52] pkgconfig_2.0.3 farver_2.1.0 sass_0.4.0
## [55] uwot_0.1.10 dbplyr_2.1.1 deldir_0.2-10
## [58] utf8_1.2.1 tidyselect_1.1.1 labeling_0.4.2
## [61] rlang_0.4.11 reshape2_1.4.4 later_1.2.0
## [64] munsell_0.5.0 cellranger_1.1.0 tools_4.1.0
## [67] cli_3.0.0 generics_0.1.0 broom_0.7.8
## [70] ggridges_0.5.3 evaluate_0.14 fastmap_1.1.0
## [73] yaml_2.2.1 goftest_1.2-2 knitr_1.33
## [76] fs_1.5.0 fitdistrplus_1.1-5 RANN_2.6.1
## [79] pbapply_1.4-3 future_1.21.0 nlme_3.1-152
## [82] mime_0.11 xml2_1.3.2 compiler_4.1.0
## [85] rstudioapi_0.13 png_0.1-7 spatstat.utils_2.2-0
## [88] reprex_2.0.0 bslib_0.2.5.1 stringi_1.6.2
## [91] highr_0.9 RSpectra_0.16-0 lattice_0.20-44
## [94] Matrix_1.3-3 vctrs_0.3.8 pillar_1.6.1
## [97] lifecycle_1.0.0 spatstat.geom_2.2-0 lmtest_0.9-38
## [100] jquerylib_0.1.4 RcppAnnoy_0.0.18 data.table_1.14.0
## [103] cowplot_1.1.1 irlba_2.3.3 httpuv_1.6.1
## [106] patchwork_1.1.1 R6_2.5.0 promises_1.2.0.1
## [109] KernSmooth_2.23-20 gridExtra_2.3 parallelly_1.26.1
## [112] codetools_0.2-18 MASS_7.3-54 assertthat_0.2.1
## [115] withr_2.4.2 sctransform_0.3.2 mgcv_1.8-35
## [118] parallel_4.1.0 hms_1.1.0 grid_4.1.0
## [121] rpart_4.1-15 Rtsne_0.15 shiny_1.6.0
## [124] lubridate_1.7.10
#https://ucdavis-bioinformatics-training.github.io/2021-March-Single-Cell-RNA-Seq-Analysis/data_analysis/scRNA_Workshop-PART4_fixed